{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "cdN6QOXIUaUq"
      },
      "source": [
        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pinecone-io/examples/blob/master/docs/assets/how-to-create-pinecone-datasets.ipynb)\n",
        "[![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/pinecone-io/examples/blob/master/docs/assets/how-to-create-pinecone-datasets.ipynb)\n",
        "\n",
        "# Creating Pinecone Datasets"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "8Fiobs_oUaUr"
      },
      "source": [
        "This notebook will walk you through the process of creating a Pinecone dataset from a pandas Dataframe."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "DLuQirtzUaUs"
      },
      "source": [
        "## Step 1: create a simple sample dataset"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "bVW2DlVQUaUs",
        "outputId": "bd3c9438-7c67-4097-b580-4bfdd695ab92",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "outputs": [],
      "source": [
        "!pip install -qU pandas==2.0.2"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "fPebr9XNUaUs"
      },
      "outputs": [],
      "source": [
        "import pandas as pd"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "id": "I_WRSqY8UaUs",
        "outputId": "36348ad8-38ef-40b2-8b0c-fc7e34e12575",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        }
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "  id           values                                      sparse_values  \\\n",
              "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
              "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
              "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
              "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
              "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
              "\n",
              "                             metadata                                blob  \n",
              "0  {'title': 'title1', 'url': 'url1'}      {'extra_field': 'extra_value'}  \n",
              "1  {'title': 'title2', 'url': 'url2'}                                None  \n",
              "2  {'title': 'title3', 'url': 'url3'}                                None  \n",
              "3  {'title': 'title4', 'url': 'url4'}                                None  \n",
              "4  {'title': 'title5', 'url': 'url5'}  {'another_field': 'another_value'}  "
            ],
            "text/html": [
              "\n",
              "\n",
              "  <div id=\"df-ee9831ef-5516-44bf-8080-3a2a74e6f00c\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>id</th>\n",
              "      <th>values</th>\n",
              "      <th>sparse_values</th>\n",
              "      <th>metadata</th>\n",
              "      <th>blob</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1</td>\n",
              "      <td>[0.1, 0.2, 0.3]</td>\n",
              "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
              "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
              "      <td>{'extra_field': 'extra_value'}</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2</td>\n",
              "      <td>[0.4, 0.5, 0.6]</td>\n",
              "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
              "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>3</td>\n",
              "      <td>[0.7, 0.8, 0.9]</td>\n",
              "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
              "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>4</td>\n",
              "      <td>[1.0, 1.1, 1.2]</td>\n",
              "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
              "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>5</td>\n",
              "      <td>[1.3, 1.4, 1.5]</td>\n",
              "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
              "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
              "      <td>{'another_field': 'another_value'}</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ee9831ef-5516-44bf-8080-3a2a74e6f00c')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "\n",
              "\n",
              "\n",
              "    <div id=\"df-2dbf45c1-fcbc-44df-ae34-b177e0482493\">\n",
              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-2dbf45c1-fcbc-44df-ae34-b177e0482493')\"\n",
              "              title=\"Suggest charts.\"\n",
              "              style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "      </button>\n",
              "    </div>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "    background-color: #E8F0FE;\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: #1967D2;\n",
              "    height: 32px;\n",
              "    padding: 0 0 0 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: #E2EBFA;\n",
              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: #174EA6;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "    background-color: #3B4455;\n",
              "    fill: #D2E3FC;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart:hover {\n",
              "    background-color: #434B5C;\n",
              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "    fill: #FFFFFF;\n",
              "  }\n",
              "</style>\n",
              "\n",
              "    <script>\n",
              "      async function quickchart(key) {\n",
              "        const containerElement = document.querySelector('#' + key);\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      }\n",
              "    </script>\n",
              "\n",
              "      <script>\n",
              "\n",
              "function displayQuickchartButton(domScope) {\n",
              "  let quickchartButtonEl =\n",
              "    domScope.querySelector('#df-2dbf45c1-fcbc-44df-ae34-b177e0482493 button.colab-df-quickchart');\n",
              "  quickchartButtonEl.style.display =\n",
              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "}\n",
              "\n",
              "        displayQuickchartButton(document);\n",
              "      </script>\n",
              "      <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-ee9831ef-5516-44bf-8080-3a2a74e6f00c button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-ee9831ef-5516-44bf-8080-3a2a74e6f00c');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n"
            ]
          },
          "metadata": {},
          "execution_count": 3
        }
      ],
      "source": [
        "documents = [\n",
        "    {\n",
        "        \"id\": \"1\",\n",
        "        \"values\": [0.1, 0.2, 0.3],\n",
        "        \"sparse_values\": {\"indices\": [1, 2, 3], \"values\": [0.1, 0.2, 0.3]},\n",
        "        \"metadata\": {\"title\": \"title1\", \"url\": \"url1\"},\n",
        "        \"blob\": {\"extra_field\": \"extra_value\"},\n",
        "    },\n",
        "    {\n",
        "        \"id\": \"2\",\n",
        "        \"values\": [0.4, 0.5, 0.6],\n",
        "        \"sparse_values\": {\"indices\": [4, 5, 6], \"values\": [0.4, 0.5, 0.6]},\n",
        "        \"metadata\": {\"title\": \"title2\", \"url\": \"url2\"},\n",
        "        \"blob\": None,\n",
        "    },\n",
        "    {\n",
        "        \"id\": \"3\",\n",
        "        \"values\": [0.7, 0.8, 0.9],\n",
        "        \"sparse_values\": {\"indices\": [7, 8, 9], \"values\": [0.7, 0.8, 0.9]},\n",
        "        \"metadata\": {\"title\": \"title3\", \"url\": \"url3\"},\n",
        "        \"blob\": None,\n",
        "    },\n",
        "    {\n",
        "        \"id\": \"4\",\n",
        "        \"values\": [1.0, 1.1, 1.2],\n",
        "        \"sparse_values\": {\"indices\": [10, 11, 12], \"values\": [1.0, 1.1, 1.2]},\n",
        "        \"metadata\": {\"title\": \"title4\", \"url\": \"url4\"},\n",
        "        \"blob\": None,\n",
        "    },\n",
        "    {\n",
        "        \"id\": \"5\",\n",
        "        \"values\": [1.3, 1.4, 1.5],\n",
        "        \"sparse_values\": {\"indices\": [13, 14, 15], \"values\": [1.3, 1.4, 1.5]},\n",
        "        \"metadata\": {\"title\": \"title5\", \"url\": \"url5\"},\n",
        "        \"blob\": {\"another_field\": \"another_value\"},\n",
        "    }\n",
        "]\n",
        "\n",
        "df = pd.DataFrame(documents)\n",
        "df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "c_zwxJ_OUaUt"
      },
      "source": [
        "Some notes:\n",
        "* Note that we have both metadata field and 'blob' field, the metadata field is the acutal pinecone metadata we will use in our index, blob, is an additional field that we can use to store any additional information we want to store along with the Dataset.\n",
        "* here we used both 'values' and 'sparse_values', however, sparse_values is not a mandatory field, if you don't have sparse values keep it empty."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "BcFx1wFqUaUt"
      },
      "source": [
        "## Pinecone Dataset\n",
        "\n",
        "Now that we have our data Ready, we can create a Pinecone Dataset. A Pinecone Dataset is a collection of documtents, queries and Metadata. We can create a Pinecone\n",
        "* Documents: a collection of records with Id, Vectors (dense, sparse) and metadata\n",
        "* Queries: a collection of queries with Vectors (dense, sparse), metadata filter and top_k\n",
        "* Metadata: a defintion of the dataset: Name, dimension, metric, embedding models, etc."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "id": "DCGFhTtyUaUt"
      },
      "outputs": [],
      "source": [
        "!pip install -qU \\\n",
        "  pinecone-client==2.2.2 \\\n",
        "  pinecone-datasets==0.6.0"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "S9NCQyTqUaUt"
      },
      "outputs": [],
      "source": [
        "from pinecone_datasets import Dataset, DatasetMetadata"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "id": "Eaiy3IjIUaUt",
        "outputId": "4ff727bd-1a56-42bb-8cd2-e645b5ab390c",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'name': '',\n",
              " 'created_at': '2023-08-14 09:18:50.196514',\n",
              " 'documents': 0,\n",
              " 'queries': 0,\n",
              " 'source': None,\n",
              " 'license': None,\n",
              " 'bucket': None,\n",
              " 'task': None,\n",
              " 'dense_model': {'name': '', 'tokenizer': None, 'dimension': 0},\n",
              " 'sparse_model': None,\n",
              " 'description': None,\n",
              " 'tags': None,\n",
              " 'args': None}"
            ]
          },
          "metadata": {},
          "execution_count": 6
        }
      ],
      "source": [
        "# creating a new empty metadata\n",
        "metadata = DatasetMetadata.empty()\n",
        "metadata.dict()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "id": "g_ACjKDOUaUt",
        "outputId": "bc47c7d1-a3ef-4cf1-9e4b-7da6f82e111c",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        }
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "  id           values                                      sparse_values  \\\n",
              "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
              "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
              "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
              "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
              "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
              "\n",
              "                             metadata                                blob  \n",
              "0  {'title': 'title1', 'url': 'url1'}      {'extra_field': 'extra_value'}  \n",
              "1  {'title': 'title2', 'url': 'url2'}                                None  \n",
              "2  {'title': 'title3', 'url': 'url3'}                                None  \n",
              "3  {'title': 'title4', 'url': 'url4'}                                None  \n",
              "4  {'title': 'title5', 'url': 'url5'}  {'another_field': 'another_value'}  "
            ],
            "text/html": [
              "\n",
              "\n",
              "  <div id=\"df-7047aefe-be6d-423c-b810-75e31017f008\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>id</th>\n",
              "      <th>values</th>\n",
              "      <th>sparse_values</th>\n",
              "      <th>metadata</th>\n",
              "      <th>blob</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1</td>\n",
              "      <td>[0.1, 0.2, 0.3]</td>\n",
              "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
              "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
              "      <td>{'extra_field': 'extra_value'}</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2</td>\n",
              "      <td>[0.4, 0.5, 0.6]</td>\n",
              "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
              "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>3</td>\n",
              "      <td>[0.7, 0.8, 0.9]</td>\n",
              "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
              "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>4</td>\n",
              "      <td>[1.0, 1.1, 1.2]</td>\n",
              "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
              "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>5</td>\n",
              "      <td>[1.3, 1.4, 1.5]</td>\n",
              "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
              "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
              "      <td>{'another_field': 'another_value'}</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7047aefe-be6d-423c-b810-75e31017f008')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "\n",
              "\n",
              "\n",
              "    <div id=\"df-ba273d30-2d9c-43f2-b7d1-7b27125abb98\">\n",
              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-ba273d30-2d9c-43f2-b7d1-7b27125abb98')\"\n",
              "              title=\"Suggest charts.\"\n",
              "              style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "      </button>\n",
              "    </div>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "    background-color: #E8F0FE;\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: #1967D2;\n",
              "    height: 32px;\n",
              "    padding: 0 0 0 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: #E2EBFA;\n",
              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: #174EA6;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "    background-color: #3B4455;\n",
              "    fill: #D2E3FC;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart:hover {\n",
              "    background-color: #434B5C;\n",
              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "    fill: #FFFFFF;\n",
              "  }\n",
              "</style>\n",
              "\n",
              "    <script>\n",
              "      async function quickchart(key) {\n",
              "        const containerElement = document.querySelector('#' + key);\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      }\n",
              "    </script>\n",
              "\n",
              "      <script>\n",
              "\n",
              "function displayQuickchartButton(domScope) {\n",
              "  let quickchartButtonEl =\n",
              "    domScope.querySelector('#df-ba273d30-2d9c-43f2-b7d1-7b27125abb98 button.colab-df-quickchart');\n",
              "  quickchartButtonEl.style.display =\n",
              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "}\n",
              "\n",
              "        displayQuickchartButton(document);\n",
              "      </script>\n",
              "      <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-7047aefe-be6d-423c-b810-75e31017f008 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-7047aefe-be6d-423c-b810-75e31017f008');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n"
            ]
          },
          "metadata": {},
          "execution_count": 7
        }
      ],
      "source": [
        "ds = Dataset.from_pandas(documents=df, q=None, metadata=metadata)\n",
        "ds.documents"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "CGzdg2sZUaUt"
      },
      "source": [
        "## Save dataset to local path\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "IVkK6fJUUaUt",
        "outputId": "943ff58d-91d6-4a75-e218-d833214fee1b",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/pinecone_datasets/dataset.py:433: UserWarning: Queries are empty, not saving queries\n",
            "  warnings.warn(\"Queries are empty, not saving queries\")\n"
          ]
        }
      ],
      "source": [
        "ds.to_path('/tmp/ds')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "B5tvJlnSUaUu"
      },
      "source": [
        "### Reload dataset"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "id": "pLEhwSaRUaUu"
      },
      "outputs": [],
      "source": [
        "new_ds = Dataset.from_path('/tmp/ds')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "id": "J5LJGYqxUaUu",
        "outputId": "120f1ebf-e30a-4913-a84f-727e52e2add8",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        }
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "  id           values                                      sparse_values  \\\n",
              "0  1  [0.1, 0.2, 0.3]  {'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}   \n",
              "1  2  [0.4, 0.5, 0.6]  {'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}   \n",
              "2  3  [0.7, 0.8, 0.9]  {'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}   \n",
              "3  4  [1.0, 1.1, 1.2]  {'indices': [10, 11, 12], 'values': [1.0, 1.1,...   \n",
              "4  5  [1.3, 1.4, 1.5]  {'indices': [13, 14, 15], 'values': [1.3, 1.4,...   \n",
              "\n",
              "                             metadata  \\\n",
              "0  {'title': 'title1', 'url': 'url1'}   \n",
              "1  {'title': 'title2', 'url': 'url2'}   \n",
              "2  {'title': 'title3', 'url': 'url3'}   \n",
              "3  {'title': 'title4', 'url': 'url4'}   \n",
              "4  {'title': 'title5', 'url': 'url5'}   \n",
              "\n",
              "                                                blob  \n",
              "0  {'another_field': None, 'extra_field': 'extra_...  \n",
              "1                                               None  \n",
              "2                                               None  \n",
              "3                                               None  \n",
              "4  {'another_field': 'another_value', 'extra_fiel...  "
            ],
            "text/html": [
              "\n",
              "\n",
              "  <div id=\"df-924edd68-488a-465d-825a-1743d1db0e66\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>id</th>\n",
              "      <th>values</th>\n",
              "      <th>sparse_values</th>\n",
              "      <th>metadata</th>\n",
              "      <th>blob</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1</td>\n",
              "      <td>[0.1, 0.2, 0.3]</td>\n",
              "      <td>{'indices': [1, 2, 3], 'values': [0.1, 0.2, 0.3]}</td>\n",
              "      <td>{'title': 'title1', 'url': 'url1'}</td>\n",
              "      <td>{'another_field': None, 'extra_field': 'extra_...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2</td>\n",
              "      <td>[0.4, 0.5, 0.6]</td>\n",
              "      <td>{'indices': [4, 5, 6], 'values': [0.4, 0.5, 0.6]}</td>\n",
              "      <td>{'title': 'title2', 'url': 'url2'}</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>3</td>\n",
              "      <td>[0.7, 0.8, 0.9]</td>\n",
              "      <td>{'indices': [7, 8, 9], 'values': [0.7, 0.8, 0.9]}</td>\n",
              "      <td>{'title': 'title3', 'url': 'url3'}</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>4</td>\n",
              "      <td>[1.0, 1.1, 1.2]</td>\n",
              "      <td>{'indices': [10, 11, 12], 'values': [1.0, 1.1,...</td>\n",
              "      <td>{'title': 'title4', 'url': 'url4'}</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>5</td>\n",
              "      <td>[1.3, 1.4, 1.5]</td>\n",
              "      <td>{'indices': [13, 14, 15], 'values': [1.3, 1.4,...</td>\n",
              "      <td>{'title': 'title5', 'url': 'url5'}</td>\n",
              "      <td>{'another_field': 'another_value', 'extra_fiel...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-924edd68-488a-465d-825a-1743d1db0e66')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "\n",
              "\n",
              "\n",
              "    <div id=\"df-42148e02-48b4-4818-94f5-46b211ba40a7\">\n",
              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-42148e02-48b4-4818-94f5-46b211ba40a7')\"\n",
              "              title=\"Suggest charts.\"\n",
              "              style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "      </button>\n",
              "    </div>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "    background-color: #E8F0FE;\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: #1967D2;\n",
              "    height: 32px;\n",
              "    padding: 0 0 0 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: #E2EBFA;\n",
              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: #174EA6;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "    background-color: #3B4455;\n",
              "    fill: #D2E3FC;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart:hover {\n",
              "    background-color: #434B5C;\n",
              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "    fill: #FFFFFF;\n",
              "  }\n",
              "</style>\n",
              "\n",
              "    <script>\n",
              "      async function quickchart(key) {\n",
              "        const containerElement = document.querySelector('#' + key);\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      }\n",
              "    </script>\n",
              "\n",
              "      <script>\n",
              "\n",
              "function displayQuickchartButton(domScope) {\n",
              "  let quickchartButtonEl =\n",
              "    domScope.querySelector('#df-42148e02-48b4-4818-94f5-46b211ba40a7 button.colab-df-quickchart');\n",
              "  quickchartButtonEl.style.display =\n",
              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "}\n",
              "\n",
              "        displayQuickchartButton(document);\n",
              "      </script>\n",
              "      <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-924edd68-488a-465d-825a-1743d1db0e66 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-924edd68-488a-465d-825a-1743d1db0e66');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n"
            ]
          },
          "metadata": {},
          "execution_count": 10
        }
      ],
      "source": [
        "new_ds.documents"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.6"
    },
    "orig_nbformat": 4,
    "colab": {
      "provenance": []
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}